
import os

import matplotlib.pyplot as plt
import pandas as pd
print(os.getcwd())
df = pd.read_excel('computer_1_old.xlsx')
df ['salary']=df['salary'].astype(str)

def extract_salary(value):
    if '万/月' in value:
        if '-' in value:
           min1 = float(value[:value.find('-')])
           max1 = float(value[value.find('-')+1:value.find('万')])
           new = int((max1+min1)/2*10000)
           return new
        else:
          new =int(float(value[:value.find('万')])*10000)
          return new
    elif'元/月'in value:
        new=int(value[:value.find('元')])
        return  new
    elif '元/天'in value:
        new=int(float(value[:value.find('元')]))*22
        return new
    elif '面议'in value:
        return  None
    else:
        return  None

df['salary_new']=df['salary'].apply(extract_salary)
df.to_excel('computer_1_new.xlsx',sheet_name='sheet1')
print(df)
import matplotlib
##设置楷体
matplotlib.rcParams['font.sans-serif']=['KaiTi']
salary=df['salary_new'].value_counts()
print(salary)
salary.plot(kind = 'bar',color=['r','g','b','c','m','y','k'])
for x,y in enumerate(salary):
    plt.text(x,y,str(y),ha='center',fontsize=12)
plt.title('工资分布',fontsize=16)
plt.xlabel('工资价格',fontsize=16)
plt.ylabel('数列',fontsize=16)
plt.tight_layout()
plt.show()